{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.016417Z",
     "start_time": "2020-11-17T13:25:59.291574Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "np.set_printoptions(precision=2)\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from scipy import stats\n",
    "from collections import Counter\n",
    "\n",
    "sns.set_style('ticks')\n",
    "\n",
    "%matplotlib inline\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "import matplotlib as mpl\n",
    "mpl.rcParams['figure.dpi']= 300\n",
    "mpl.rc(\"savefig\", dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Read files and select drugs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.021357Z",
     "start_time": "2020-11-17T13:26:00.018213Z"
    }
   },
   "outputs": [],
   "source": [
    "ref_type = 'log2_median_ic50_hn' # log2_median_ic50_3f_hn | log2_median_ic50_hn\n",
    "model_name = 'RWEN'\n",
    "\n",
    "# for each patient, if cell cluster is less than 5%, then we don't consider that cluster \n",
    "freq_cutoff = 0.05\n",
    "\n",
    "# shift the dosage as GDSC experiment (Syto60) is less sensitive\n",
    "dosage_shifted = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.034352Z",
     "start_time": "2020-11-17T13:26:00.023614Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(81, 27)\n"
     ]
    }
   ],
   "source": [
    "drug_info_df = pd.read_csv('../preprocessed_data/GDSC/hn_drug_stat.csv', index_col=0)\n",
    "drug_info_df.index = drug_info_df.index.astype(str)\n",
    "\n",
    "drug_id_name_dict = dict(zip(drug_info_df.index, drug_info_df['Drug Name']))\n",
    "print (drug_info_df.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.043067Z",
     "start_time": "2020-11-17T13:26:00.036354Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tested_drug_list = [1032, 1007, 133, 201, 1010] + [182, 301, 302]\n",
    "[d for d in tested_drug_list if d not in drug_info_df.index.astype(int)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Read predicted IC50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.047482Z",
     "start_time": "2020-11-17T13:26:00.044930Z"
    }
   },
   "outputs": [],
   "source": [
    "norm_type = 'patient_TPM'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.056856Z",
     "start_time": "2020-11-17T13:26:00.049403Z"
    }
   },
   "outputs": [],
   "source": [
    "cadrres_patient_df = pd.read_csv('../result/HN_model/{}/{}_pred.csv'.format(norm_type, model_name), index_col=0)\n",
    "out_dir = '../result/HN_model/{}/'.format(norm_type)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.075128Z",
     "start_time": "2020-11-17T13:26:00.059562Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>1007</th>\n",
       "      <th>133</th>\n",
       "      <th>201</th>\n",
       "      <th>1010</th>\n",
       "      <th>182</th>\n",
       "      <th>301</th>\n",
       "      <th>302</th>\n",
       "      <th>1012</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>patient_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>HN120</th>\n",
       "      <td>-9.944659</td>\n",
       "      <td>-5.930867</td>\n",
       "      <td>-12.221588</td>\n",
       "      <td>0.434450</td>\n",
       "      <td>-4.659462</td>\n",
       "      <td>0.542657</td>\n",
       "      <td>-2.792519</td>\n",
       "      <td>-0.559059</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HN137</th>\n",
       "      <td>-9.896650</td>\n",
       "      <td>-5.913446</td>\n",
       "      <td>-12.231972</td>\n",
       "      <td>-0.295715</td>\n",
       "      <td>-4.489943</td>\n",
       "      <td>-1.005280</td>\n",
       "      <td>-2.966661</td>\n",
       "      <td>-0.648973</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HN148</th>\n",
       "      <td>-9.928479</td>\n",
       "      <td>-5.837932</td>\n",
       "      <td>-12.183435</td>\n",
       "      <td>0.321664</td>\n",
       "      <td>-3.592695</td>\n",
       "      <td>1.001940</td>\n",
       "      <td>-1.491380</td>\n",
       "      <td>-0.820310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HN159</th>\n",
       "      <td>-8.255876</td>\n",
       "      <td>-5.912368</td>\n",
       "      <td>-10.056112</td>\n",
       "      <td>0.803703</td>\n",
       "      <td>-3.580523</td>\n",
       "      <td>1.039160</td>\n",
       "      <td>-2.348676</td>\n",
       "      <td>-0.867052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HN160</th>\n",
       "      <td>-8.660189</td>\n",
       "      <td>-6.011195</td>\n",
       "      <td>-10.082678</td>\n",
       "      <td>1.718468</td>\n",
       "      <td>-2.087583</td>\n",
       "      <td>3.158456</td>\n",
       "      <td>1.940603</td>\n",
       "      <td>0.108160</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                1007       133        201      1010       182       301  \\\n",
       "patient_id                                                                \n",
       "HN120      -9.944659 -5.930867 -12.221588  0.434450 -4.659462  0.542657   \n",
       "HN137      -9.896650 -5.913446 -12.231972 -0.295715 -4.489943 -1.005280   \n",
       "HN148      -9.928479 -5.837932 -12.183435  0.321664 -3.592695  1.001940   \n",
       "HN159      -8.255876 -5.912368 -10.056112  0.803703 -3.580523  1.039160   \n",
       "HN160      -8.660189 -6.011195 -10.082678  1.718468 -2.087583  3.158456   \n",
       "\n",
       "                 302      1012  \n",
       "patient_id                      \n",
       "HN120      -2.792519 -0.559059  \n",
       "HN137      -2.966661 -0.648973  \n",
       "HN148      -1.491380 -0.820310  \n",
       "HN159      -2.348676 -0.867052  \n",
       "HN160       1.940603  0.108160  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cadrres_patient_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.086107Z",
     "start_time": "2020-11-17T13:26:00.077765Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8 6\n"
     ]
    }
   ],
   "source": [
    "drug_list = cadrres_patient_df.columns\n",
    "patient_list = cadrres_patient_df.index\n",
    "print(len(drug_list), len(patient_list))\n",
    "\n",
    "drug_info_df = drug_info_df.loc[drug_list]\n",
    "cadrres_patient_df = cadrres_patient_df[drug_list]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.091698Z",
     "start_time": "2020-11-17T13:26:00.088010Z"
    }
   },
   "outputs": [],
   "source": [
    "if dosage_shifted:\n",
    "    # Shift by 4 uM\n",
    "    cadrres_patient_df = cadrres_patient_df - 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### List all pairs of patient and drug"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.113901Z",
     "start_time": "2020-11-17T13:26:00.094110Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>1007</th>\n",
       "      <th>133</th>\n",
       "      <th>201</th>\n",
       "      <th>1010</th>\n",
       "      <th>182</th>\n",
       "      <th>301</th>\n",
       "      <th>302</th>\n",
       "      <th>1012</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>patient_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>HN120</th>\n",
       "      <td>52.625667</td>\n",
       "      <td>89.675468</td>\n",
       "      <td>92.858719</td>\n",
       "      <td>22.197506</td>\n",
       "      <td>76.475047</td>\n",
       "      <td>89.318748</td>\n",
       "      <td>98.294662</td>\n",
       "      <td>70.691052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HN137</th>\n",
       "      <td>51.795380</td>\n",
       "      <td>89.563133</td>\n",
       "      <td>92.906302</td>\n",
       "      <td>32.123949</td>\n",
       "      <td>74.295821</td>\n",
       "      <td>96.070874</td>\n",
       "      <td>98.485631</td>\n",
       "      <td>71.965477</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HN148</th>\n",
       "      <td>52.345992</td>\n",
       "      <td>89.063629</td>\n",
       "      <td>92.681351</td>\n",
       "      <td>23.576926</td>\n",
       "      <td>60.813421</td>\n",
       "      <td>85.880138</td>\n",
       "      <td>95.900024</td>\n",
       "      <td>74.297931</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HN159</th>\n",
       "      <td>25.626897</td>\n",
       "      <td>89.556142</td>\n",
       "      <td>74.349112</td>\n",
       "      <td>18.091801</td>\n",
       "      <td>60.612177</td>\n",
       "      <td>85.564395</td>\n",
       "      <td>97.694523</td>\n",
       "      <td>74.911745</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HN160</th>\n",
       "      <td>31.319958</td>\n",
       "      <td>90.179706</td>\n",
       "      <td>74.698711</td>\n",
       "      <td>10.487380</td>\n",
       "      <td>35.347757</td>\n",
       "      <td>57.702864</td>\n",
       "      <td>68.426803</td>\n",
       "      <td>60.299148</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 1007        133        201       1010        182        301  \\\n",
       "patient_id                                                                     \n",
       "HN120       52.625667  89.675468  92.858719  22.197506  76.475047  89.318748   \n",
       "HN137       51.795380  89.563133  92.906302  32.123949  74.295821  96.070874   \n",
       "HN148       52.345992  89.063629  92.681351  23.576926  60.813421  85.880138   \n",
       "HN159       25.626897  89.556142  74.349112  18.091801  60.612177  85.564395   \n",
       "HN160       31.319958  90.179706  74.698711  10.487380  35.347757  57.702864   \n",
       "\n",
       "                  302       1012  \n",
       "patient_id                        \n",
       "HN120       98.294662  70.691052  \n",
       "HN137       98.485631  71.965477  \n",
       "HN148       95.900024  74.297931  \n",
       "HN159       97.694523  74.911745  \n",
       "HN160       68.426803  60.299148  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_delta_df = pd.DataFrame(cadrres_patient_df.values - drug_info_df[ref_type].values, columns=drug_list, index=patient_list)\n",
    "pred_cv_df = 100 / (1 + (np.power(2, -pred_delta_df)))\n",
    "pred_kill_df = 100 - pred_cv_df\n",
    "\n",
    "pred_kill_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.140435Z",
     "start_time": "2020-11-17T13:26:00.115719Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patient</th>\n",
       "      <th>drug_id</th>\n",
       "      <th>kill</th>\n",
       "      <th>drug_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>HN120</td>\n",
       "      <td>1007</td>\n",
       "      <td>52.625667</td>\n",
       "      <td>Docetaxel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>HN120</td>\n",
       "      <td>133</td>\n",
       "      <td>89.675468</td>\n",
       "      <td>Doxorubicin</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>HN120</td>\n",
       "      <td>201</td>\n",
       "      <td>92.858719</td>\n",
       "      <td>Epothilone B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>HN120</td>\n",
       "      <td>1010</td>\n",
       "      <td>22.197506</td>\n",
       "      <td>Gefitinib</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>HN120</td>\n",
       "      <td>182</td>\n",
       "      <td>76.475047</td>\n",
       "      <td>Obatoclax Mesylate</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  patient drug_id       kill           drug_name\n",
       "0   HN120    1007  52.625667           Docetaxel\n",
       "1   HN120     133  89.675468         Doxorubicin\n",
       "2   HN120     201  92.858719        Epothilone B\n",
       "3   HN120    1010  22.197506           Gefitinib\n",
       "4   HN120     182  76.475047  Obatoclax Mesylate"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "single_drug_pred_df = pred_kill_df.stack().reset_index()\n",
    "single_drug_pred_df.columns = ['patient', 'drug_id', 'kill']\n",
    "single_drug_pred_df.loc[:, 'drug_name'] = [drug_id_name_dict[d] for d in single_drug_pred_df['drug_id'].values]\n",
    "single_drug_pred_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-17T13:26:00.152568Z",
     "start_time": "2020-11-17T13:26:00.142601Z"
    }
   },
   "outputs": [],
   "source": [
    "if dosage_shifted:\n",
    "    single_drug_pred_df.to_csv(out_dir + 'pred_drug_kill_{}_{}_shifted.csv'.format(ref_type, model_name), index=False)\n",
    "else:\n",
    "    single_drug_pred_df.to_csv(out_dir + 'pred_drug_kill_{}_{}.csv'.format(ref_type, model_name), index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
